#!/bin/bash

START_PATH=`pwd`
cd $START_PATH
cd ../..

WORK_DIR=/user/jaubertb

echo "Preparing data"
hadoop fs -rm -r ${WORK_DIR}/classification
hadoop fs -mkdir ${WORK_DIR}/classification
hadoop fs -cp ${WORK_DIR}/twitter/text/*/*/* ${WORK_DIR}/classification

echo "Creating sequence files from classification data"
./bin/mahout seqdirectory \
  -i ${WORK_DIR}/classification \
  -o ${WORK_DIR}/classification-seq -ow -xm sequential

echo "Converting sequence files to vectors"
./bin/mahout seq2sparse \
  -i ${WORK_DIR}/classification \
  -o ${WORK_DIR}/classification-vectors  -lnorm -nv  -wt tfidf

echo "Training Naive Bayes model"
./bin/mahout trainnb \
  -i ${WORK_DIR}/classification-vectors -el \
  -o ${WORK_DIR}/model \
  -li ${WORK_DIR}/labelindex \
  -ow $c

echo "Self testing on training set"
./bin/mahout testnb \
  -i ${WORK_DIR}/classification-vectors\
  -m ${WORK_DIR}/model \
  -l ${WORK_DIR}/labelindex \
  -ow -o ${WORK_DIR}/classification-testing $c